/* 
    Purpose: Using the 1980 Census, this file takes cleaned variables 
             from 5a and calculates average predicted father income 
             (i.e., "income scores") at various levels.

    Note: Income scores for non-working fathers are only made at the 
          preferred level of variation (occ x race x south).

    Creates: Income scores at occ, occ x race, occ x race x south,
             occ x race x region (4), and occ x race x south x edu
             levels. All output files have the prefix 
             "incomescores_fathers1980_"
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"
 
    use ./output/Census1980_fathers_ages30to50.dta, clear   
    gen tag=1
    gen number=1 
    
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

* Template 1: occupation
preserve
    collapse (rawsum) tag, by(fatheroccej)

    drop tag   
    tempfile template_byocc
    save `template_byocc'
        
restore 

* Template 2: occupation x race
preserve 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1

    drop tag       
    tempfile template_byocc_byr
    save `template_byocc_byr'
        
restore 

* Template 3: occupation x race x south
preserve 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)

    drop tag       
    tempfile template_byocc_byr_bys
    save `template_byocc_byr_bys'
        
restore 

* Template 4: occupation x race x region (4)
preserve
    drop if fatheroccej==99  
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 4
    bysort fatheroccej race: gen region_merge = _n

    drop tag       
    tempfile template_byocc_byr_byreg
    save `template_byocc_byr_byreg'
        
restore 

* Template 5: occupation x race x south x edu
preserve 
    drop if fatheroccej==99 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
    
    expand 5
    bysort fatheroccej race south_merge: gen edu = _n

    drop tag       
    tempfile template_byors_byedu
    save `template_byors_byedu'
        
restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

global income_measures "inctot faminc HHinc"

*******************
*** COLLAPSE 
*******************

foreach x in byocc byocc_byr byocc_byr_bys byors_altwgt byocc_byr_byreg byors_byedu {

* Cell
    if "`x'"=="byocc" local cell "fatheroccej"
    if "`x'"=="byocc_byr" local cell "fatheroccej race"
    if ("`x'"=="byocc_byr_bys"| "`x'"=="byors_altwgt") local cell "fatheroccej race south_merge"
    if "`x'"=="byocc_byr_byreg" local cell "fatheroccej race region_merge"
    if "`x'"=="byors_byedu" local cell "fatheroccej race south_merge edu"

 * Weight
    if "`x'"!="byors_altwgt" local weight "perwt"
    if "`x'"=="byors_altwgt" local weight "wgt1980_hhsizeadj"

preserve 

    if ("`x'"!="byocc_byr_bys" & "`x'"!="byors_altwgt" & "`x'"!="byocc" & "`x'"!="byocc_byr") drop if fatheroccej==99 
    summ fatheroccej 
    
    collapse (rawsum) number (mean) $income_measures [aw=`weight'], by(`cell') 
    
    foreach c in $income_measures {
    rename `c' avg_`c'_1980_`x'
    label var avg_`c'_1980_`x' "Coarse income score, average, 1980 using `c'"
    }
    
    tempfile incomescores
    save `incomescores'
    
    if "`x'"!="byors_altwgt" use `template_`x''
    if "`x'"=="byors_altwgt" use `template_byocc_byr_bys'  
    merge 1:1 `cell' using `incomescores', nogen
    
    replace number=0 if number==.
    label var number "Number of obs in cell"
    rename number number_1980obs_`x'

    tempfile incomescores_`x'
    save `incomescores_`x''
    
restore 
        
}

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
*** IMPUTATIONS
****************
  
* Merge all templates together. Bring in template with largest # of cells first.  
    use `incomescores_byors_byedu', clear
    merge m:1 fatheroccej race south_merge using `incomescores_byocc_byr_bys'
    assert fatheroccej==99 if _merge!=3 
    drop _merge

    merge m:1 fatheroccej race south_merge using `incomescores_byors_altwgt'
    assert _merge==3
    drop _merge

    merge m:1 fatheroccej race using `incomescores_byocc_byr'
    assert _merge==3
    drop _merge

    merge m:1 fatheroccej using `incomescores_byocc'
    assert _merge==3
    drop _merge

    *Flag number of imputations at each level
    foreach level in byocc byocc_byr byocc_byr_bys byors_altwgt byors_byedu {
        gen flag_1980_`level' = (avg_HHinc_1980_`level'==.) if fatheroccej!=99 
        replace flag_1980_`level' =0 if fatheroccej==99 
        label var flag_1980_`level' "flag 1980 imputed cells at `level' level"
    }

    assert flag_1980_byocc_byr_bys==flag_1980_byors_altwgt 


/*   NOTE: Predicted income is never missing the occ x race 
           level. No need for imputations.  
*/

    * Impute at occ x race x south level
            /*Note: No imputations are necessary 
                    for fatheroccej==99. */

    assert number_1980obs_byocc_byr_bys == number_1980obs_byors_altwgt
    drop number_1980obs_byors_altwgt

    sort fatheroccej south_merge race
    local threshold "1"  
    local number_byr_bys "number_1980obs_byocc_byr_bys"

    foreach it in norm adj {

        if "`it'" == "norm" local inc_byr_bys "avg_inctot_1980_byocc_byr_bys avg_faminc_1980_byocc_byr_bys avg_HHinc_1980_byocc_byr_bys"
        if "`it'" == "adj"  local inc_byr_bys "avg_inctot_1980_byors_altwgt avg_faminc_1980_byors_altwgt avg_HHinc_1980_byors_altwgt"

        foreach c of local inc_byr_bys {             
            by fatheroccej south_merge: gen white_black_ratio = `c'[_n-1]/`c'[_n] if `number_byr_bys'[_n-1]>`threshold' & `number_byr_bys'>`threshold' 

            foreach j in 0 1 {
                
                /*Calculate average racial income gap in nonsouth and 
                  south separately, across occupation cells */
                quietly sum white_black_ratio if south_merge==`j' [aw=`number_byr_bys'] 
                local ratio1 = r(mean)
                display `ratio1'
                
                levelsof fatheroccej if race==2 & south_merge==`j' & `c'==., local(occs1)

                /*Missing income for black respondents: rescale white income by 
                  average racial income gap in south or nonsouth, by occupation */
                foreach y in `occs1' {
                    display "occupation: `y'"
                    sum `c' if south_merge==`j' & race==1 & fatheroccej==`y'
                    replace `c' = `r(mean)' / `ratio1' if south_merge==`j' & race==2 & fatheroccej==`y'  
                }
            }
            drop *ratio* 
        }
    }

** Impute at occ x race x south x edu level
    sort fatheroccej race south_merge edu
        
    foreach c in $income_measures  {
    
    gen temp = avg_`c'_1980_byors_byedu / avg_`c'_1980_byocc_byr_bys 
    
    gen scale_factor =.
    forval i=1(1)5 {
        /*Calculate average "temp" ratio by education level */                     
        sum temp if edu==`i' [aw=number_1980obs_byors_byedu] 
        replace scale_factor=`r(mean)' if edu==`i' 
    }

    /* Rescale occ x race x south income by (average) "premium" 
       or "penalty" of having a certain level of education */        
    replace avg_`c'_1980_byors_byedu = avg_`c'_1980_byocc_byr_bys * scale_factor if avg_`c'_1980_byors_byedu==. & fatheroccej!=99  
    
    assert avg_`c'_1980_byors_byedu!=. if fatheroccej!=99
    
    drop temp scale_factor
    }
    
   
******************************************************
* SAVE (all levels except occ x race x region (4))
******************************************************

    foreach x in byocc byocc_byr byocc_byr_bys byors_byedu {
    
    if "`x'"=="byocc" local cell "fatheroccej"
    if "`x'"=="byocc_byr" local cell "fatheroccej race"
    if "`x'"=="byocc_byr_bys" local cell "fatheroccej race south_merge"
    if "`x'"=="byors_byedu" local cell "fatheroccej race south_merge edu"

    preserve 
 
    if "`x'"=="byors_byedu" drop if fatheroccej==99 

    bysort `cell': keep if _n==1

    if "`x'"=="byocc_byr_bys" {
        keep number_1980obs_`x' `cell' avg*`x' *_altwgt flag*`x'
        assert avg_inctot_1980_`x'!=. & avg_inctot_1980_byors_altwgt!=.
    }
    if "`x'"!="byocc_byr_bys" {
        keep number_1980obs_`x' `cell' avg*`x' flag*`x'
        assert avg_inctot_1980_`x'!=.
    }

    summ fatheroccej 
   
    compress
    save ./output/incomescores_fathers1980_`x'.dta, replace
    
    restore
    
    }
   
    
* Impute at occ x race x region level

    foreach x in byocc_byr_byreg {
        local cell "fatheroccej race region_merge"
     
        * Merge all templates together. Bring in template with largest # of cells first.                 
        use `incomescores_byocc_byr_byreg', clear
        gen south_merge = region_merge==3
        
        merge m:1 fatheroccej race south_merge using `incomescores_byocc_byr_bys'
        assert fatheroccej==99 if _merge!=3 
        drop _merge

        merge m:1 fatheroccej race using `incomescores_byocc_byr'
        assert fatheroccej==99 if _merge!=3 
        drop _merge

        merge m:1 fatheroccej using `incomescores_byocc'
        assert fatheroccej==99 if _merge!=3 
        drop _merge
      
        *Flag number of imputations at region level
        gen flag_1980_byocc_byr_byreg = (avg_HHinc_1980_byocc_byr_byreg==.) if fatheroccej!=99 
        replace flag_1980_byocc_byr_byreg =0 if fatheroccej==99 
        label var flag_1980_byocc_byr_byreg "flag 1980 imputed cells at byocc_byr_byreg level"     

        * Impute 
        sort fatheroccej region_merge race
      
        local threshold "1"  
        local number_byocc_byr_byreg "number_1980obs_byocc_byr_byreg"
        local inc_byocc_byr_byreg "avg_inctot_1980_byocc_byr_byreg avg_faminc_1980_byocc_byr_byreg avg_HHinc_1980_byocc_byr_byreg "
        local cond "& fatheroccej!=99" 

        foreach c of local inc_byocc_byr_byreg {             
            by fatheroccej region_merge: gen white_black_ratio = `c'[_n-1]/`c'[_n] if `number_byocc_byr_byreg'[_n-1]>`threshold' & `number_byocc_byr_byreg'>`threshold' 

            foreach j in 1 2 3 4 {
                
                /*Calculate average racial income gap by current region of residence, 
                  across occupation cells */
                quietly sum white_black_ratio if region_merge==`j' [aw=`number_byocc_byr_byreg'] 
                local ratio1 = r(mean)
                display `ratio1'
                
                levelsof fatheroccej if race==2 & region_merge==`j' & `c'==. `cond', local(occs1)

                /*Missing income for black respondents: rescale white income by 
                  average racial income gap of region, by occupation */
                foreach y in `occs1' {
                    display "occupation: `y'"
                    sum `c' if region_merge==`j' & race==1 & fatheroccej==`y'
                   
                    if `r(N)'>0 replace `c' = `r(mean)' / `ratio1' if region_merge==`j' & race==2 & fatheroccej==`y' 
                    if `r(N)'==0 continue 
                }
                assert `c'==. if fatheroccej==99 
            }
            drop *ratio* 
        }


        drop if fatheroccej==99 

        bysort `cell': keep if _n==1
        keep number_1980obs_`x' `cell' avg*`x' flag*`x'
        
        assert avg_inctot_1980_`x'!=. if fatheroccej!=65
        summ fatheroccej 
        
        compress 
        save ./output/incomescores_fathers1980_`x'.dta, replace
        
    }
